Gradient Descent

$ \begin{eqnarray} x_{i+1}&=&x_{t} - \eta \frac{\partial F}{\partial x} \\ x_{t+1} - x_{t}&=& -\eta \frac{\partial F}{\partial x} \\ \Delta x &=& -\eta \frac{\partial F}{\partial x} \end{eqnarray} $


In [9]:
%matplotlib inline
import numpy as np
import matplotlib.cm as cm
from matplotlib import pyplot as plt
plt.style.use('ggplot')

In [10]:
x=[]
yt=[]
for i in range(20):
    x.append( [0.5+np.random.rand(), 0.5+np.random.rand()])
    yt.append([0 , 1])

for i in range(20):
    x.append( [-0.5+np.random.rand(), -0.5+np.random.rand()])
    yt.append([1 , 0])
    
x=np.array(x)
yt=np.array(yt)

In [11]:
plt.plot( x[yt[:,1]==1,0], x[yt[:,1]==1,1], 'ob')
plt.plot( x[yt[:,0]==1,0], x[yt[:,0]==1,1], 'or')


Out[11]:
[<matplotlib.lines.Line2D at 0x8c7e860>]

Single Layer Perceptron

Error

$ \begin{eqnarray} E&=&\frac{1}{2} (y_p-y_t)^2\\ \end{eqnarray} $

Input and output

$ \begin{eqnarray} a_0 &\Leftarrow &X\\ y_p& \Leftarrow &a_1\\ \end{eqnarray} $

Forward Network

$ \begin{eqnarray} z_0&=&a_0.w_0+b\\ a_1&=&g(z_0)\\ \end{eqnarray} $

Backpropagation on w0

$ \begin{eqnarray} \Delta w_0 &=& -\eta \frac{\partial E}{\partial w_0}\\ \frac{\partial E}{\partial w_0} &=& \frac{\partial E}{\partial y_p} \frac{\partial y_p}{\partial a_1} \frac{\partial a_1}{\partial z_0} \frac{\partial z_0}{\partial w_0} \\ \frac{\partial E}{\partial y_p} &=& y_p-y_t \\ \frac{\partial y_p}{\partial a_1} &=& 1 \\ \frac{\partial a_1}{\partial z_0} &=& \frac{\partial g(z_0)}{\partial z_0} \\ \frac{\partial z_0}{\partial w_0} &=& a_0 \\ \end{eqnarray} $

Backpropagation on b0

$ \begin{eqnarray} \Delta b_0 &=& -\eta \frac{\partial E}{\partial b_0}\\ \frac{\partial E}{\partial w_0} &=& \frac{\partial E}{\partial y_p} \frac{\partial y_p}{\partial a_1} \frac{\partial a_1}{\partial z_0} \frac{\partial z_0}{\partial b_0} \\ \frac{\partial E}{\partial y_p} &=& y_p-y_t \\ \frac{\partial y_p}{\partial a_1} &=& 1 \\ \frac{\partial a_1}{\partial z_0} &=& \frac{\partial g(z_0)}{\partial z_0} \\ \frac{\partial z_0}{\partial b_0} &=& 1 \\ \end{eqnarray} $


In [12]:
def g(x):    
    return 1/(1+np.exp(-x))
def grad_g(g):
        return (1-g)*g

# def g(x):    
#     return np.tanh(x)
# def grad_g(g):
#         return 1-g*g

In [13]:
#random init weight and bias
np.random.seed(1)
a0=np.concatenate((x, np.ones([x.shape[0],1])), axis=1)
w0 = np.random.random((2,2)) # [3x2] 2 inputs x 2 nodes
b0 = np.random.random((1,2)) # [3x2] 1 bias x 2 nodes
wb0=np.concatenate((w0, b0), axis=0)
for i in range(100):
    #forward x=a0, a1=yp
    z0=np.dot(a0,wb0)
    a1=g(z0) 
    #backward
#     d_a1=yt-a1
#     d_z0=d_a1*grad_g(a1)
#     d_wb0 = np.dot(a0.T,d_z0)
    
    # Exam
    yp=a1
    d_wb0=-0.1*np.dot(a0.T,(yp-yt)*(1-yp)*yp)
    
    wb0 += d_wb0
    if(i % 10) == 0:   # Only print the error every 10000 steps
        E=0.5*np.sum(np.square(yp-yt))
        print("Error: {}".format(E))


Error: 9.52605764484
Error: 2.83555758727
Error: 1.74763155731
Error: 1.30463177781
Error: 1.06122270133
Error: 0.905388907629
Error: 0.796020339922
Error: 0.71442376241
Error: 0.650839862943
Error: 0.599655872043

In [14]:
a0.shape


Out[14]:
(40L, 3L)

In [15]:
wb0.shape


Out[15]:
(3L, 2L)

In [16]:
x.shape


Out[16]:
(40L, 2L)

In [17]:
my,mx=np.mgrid[slice(-1,2,0.01),slice(-1,2,0.01)]

In [18]:
out=np.zeros(mx.shape)
for i in range(mx.shape[0]):
    for j in range(mx.shape[1]):
        u=[ mx[i,j], my[i,j],1]
        #forward
        hot=g(np.dot(u,wb0))
        out[i,j]=hot[1]-hot[0]
plt.pcolor(mx,my,out,cmap=cm.RdYlBu)
plt.colorbar()
plt.plot( x[yt[:,1]==1,0], x[yt[:,1]==1,1], 'ob')
plt.plot( x[yt[:,0]==1,0], x[yt[:,0]==1,1], 'or')


Out[18]:
[<matplotlib.lines.Line2D at 0x9e18ba8>]

In [ ]: